// Use g++ -std=c++17 -O3 -o ./run_test.exe Env.cpp RAMB.cpp AMB.cpp Qul.cpp Qhoeffding.cpp main.cpp; if ($?) { ./run_test.exe; if ($?) { python plot.py } } to run the code


#include <iostream>
#include <vector>
#include <string>
#include <random>
#include <cmath>
#include <chrono>
#include <fstream> 
#include <iomanip> 

#include "Env.h"
#include "Qhoeffding.h"
#include "RAMB.h"
#include "AMB.h"
#include "Qul.h"       

void saveDataToCSV(
    const std::vector<std::vector<float>>& ucb_h_data,
    const std::vector<std::vector<float>>& ulcb_h_data,
    const std::vector<std::vector<float>>& amb_data,
    const std::vector<std::vector<float>>& qul_data, // Added data for the 4th algorithm
    const std::string& filename) {

    std::ofstream file(filename);
    if (!file.is_open()) {
        std::cerr << "Error: Could not open file " << filename << " for writing." << std::endl;
        return;
    }

    // Write header
    file << "Episode";
    int num_runs = ucb_h_data.size();
    for (int i = 0; i < num_runs; ++i) file << ",UCB-H_" << i + 1;
    for (int i = 0; i < num_runs; ++i) file << ",RAMB_" << i + 1;
    for (int i = 0; i < num_runs; ++i) file << ",AMB_" << i + 1;
    for (int i = 0; i < num_runs; ++i) file << ",QUL_" << i + 1; // Added header for the 4th algorithm
    file << "\n";

    // Write data
    if (ucb_h_data.empty() || ucb_h_data[0].empty()) return;
    size_t num_episodes = ucb_h_data[0].size();

    for (size_t t = 0; t < num_episodes; ++t) {
        file << t + 1; // Episode number
        for (int run = 0; run < num_runs; ++run) file << "," << ucb_h_data[run][t];
        for (int run = 0; run < num_runs; ++run) file << "," << ulcb_h_data[run][t];
        for (int run = 0; run < num_runs; ++run) file << "," << amb_data[run][t];
        for (int run = 0; run < num_runs; ++run) file << "," << qul_data[run][t]; // Added data writing for the 4th algorithm
        file << "\n";
    }

    file.close();
    std::cout << "Successfully saved all run data to " << filename << std::endl;
}

int main() {
    // --- Timer Start ---
    auto start_time = std::chrono::high_resolution_clock::now();

    // --- Setup Parameters ---
    const int H = 2;
    const int S = 3;
    const int A = 3;
    const int total_episodes = 100000;
    const float c1 = 1.0f;
    const unsigned int seed = 1;
    const int num_runs = 10;


    std::vector<std::vector<float>> all_runs_ucb_h;
    std::vector<std::vector<float>> all_runs_ramb_h;
    std::vector<std::vector<float>> all_runs_amb;
    std::vector<std::vector<float>> all_runs_qul;

    // --- Main loop for running experiments multiple times ---
    for (int run = 1; run <= num_runs; ++run) {
        std::cout << "\n========================================" << std::endl;
        std::cout << "Starting Run " << run << "/" << num_runs << std::endl;
        std::cout << "========================================" << std::endl;
        
        unsigned int current_seed = seed + run;
        std::mt19937 gen(current_seed);
        FiniteStateFiniteActionMDP mdp_env(H, S, A, gen);

        // --- Helper lambda to process raw gap into the desired metric ---
        auto process_raw_gap = [&](const std::vector<float>& raw_gap) {
            std::vector<float> processed_regret;
            processed_regret.reserve(total_episodes);
            float cum = 0.0f;
            for (int t = 0; t < total_episodes; ++t) {
                cum += raw_gap[t];
                processed_regret.push_back(cum / std::log(t + 2.0f));
            }
            return processed_regret;
        };

        // --- Algorithm 1: UCB-H (from Qhoeffding.h) ---
        std::cout << "Running UCB-H..." << std::endl;
        Qlearning_gen q_hoeffding(mdp_env, c1, total_episodes); // Assuming Qlearning_gen is from Qhoeffding.h
        auto [best_value1, best_Q1, value1, global_Q1, raw_gap1] = q_hoeffding.learn();
        all_runs_ucb_h.push_back(process_raw_gap(raw_gap1));
        std::cout << "UCB-H finished for this run." << std::endl;

        // --- Algorithm 2: RAMB (from RAMB.h) ---
        std::cout << "\nRunning RAMB..." << std::endl;
        Qlearning_gen_AMBR q_ramb(mdp_env, c1, total_episodes); // Assuming Qlearning_gen_AMBR is from RAMB.h
        auto [best_value2, best_Q2, value2, QU2, raw_gap2] = q_ramb.learn();
        all_runs_ramb_h.push_back(process_raw_gap(raw_gap2));
        std::cout << "RAMB finished for this run." << std::endl;
        
        // --- Algorithm 3: AMB (from AMB.h) ---
        std::cout << "\nRunning AMB..." << std::endl;
        Qlearning_gen_AMB q_amb(mdp_env, c1, total_episodes);
        auto [best_value3, best_Q3, value3, QU3, raw_gap3] = q_amb.learn();
        all_runs_amb.push_back(process_raw_gap(raw_gap3));
        std::cout << "AMB finished for this run." << std::endl;

        // --- Algorithm 4: Refined AMB (from Qul.h) ---
        std::cout << "\nRunning ULCB-H..." << std::endl;
        Qlearning_genul q_qul(mdp_env, c1, total_episodes); // Assuming the class in Qul.h is Qlearning_qul
        auto [best_value4, best_Q4, value4, QU4, raw_gap4] = q_qul.learn();
        all_runs_qul.push_back(process_raw_gap(raw_gap4));
        std::cout << "ULCB-H finished for this run." << std::endl;
    }

    // --- Save All Data for Plotting ---
    // The CSV will now have 1 (Episode) + 10 (UCB-H) + 10 (RAMB) + 10 (AMB) + 10 (QUL) = 41 columns
    saveDataToCSV(all_runs_ucb_h, all_runs_ramb_h, all_runs_amb, all_runs_qul, "regret_data_all_runs.csv");
    
    // --- Timer End ---
    auto end_time = std::chrono::high_resolution_clock::now();
    std::chrono::duration<double> elapsed_time = end_time - start_time;

    std::cout << "\n----------------------------------------" << std::endl;
    std::cout << "Total execution time for " << num_runs << " runs: " 
              << std::fixed << std::setprecision(4) << elapsed_time.count() 
              << " seconds" << std::endl;
    std::cout << "----------------------------------------" << std::endl;

    return 0;
}